
//create analysis sample//



use "$data_in/ED_Visits", clear //Raw data from CAPER file

egen provID = group(provnpi1)

*Keep ED visits through 2016 so can observe through 2017
gen year = year(encdate)
keep if inrange(year, 2008,2016)

*include active Army Navy and AF hospitals and clinics in the US
rename dmisid DMISID
sort DMISID
merge m:1 DMISID using "$lookup/dmis", keep(3) keepus(DMISParent FacilityTypeCode  FacilityFIPS) nogen
egen clinic = group(DMISParent)
keep if FacilityTypeCode == "CLINIC" | FacilityTypeCode == "HOSP"
keep if FacilityFIPS == "US"







*only 1 ED visit per day per patient
bys patient encdate (encounter_key): keep if _n ==1

*drop extreme ED users
bys patient (encdate): gen N = _N
drop if N > 10
drop N

save "$data_out/temp_sample", replace


//Identify visits with an opioid prescription
use "$data_out/temp_sample", clear
keep patient encdate
bys patient (encdate): gen n = _n
reshape wide encdate, i(patient) j(n)


do "$Construction/prescriptions_data"
restore
merge 1:m patient using "$data_out/opioid_prescriptions", keep (3) nogen keepus(deanum datedisp decqty daysuply) // opioid_prescriptions is the raw data from the PDTS files


//manual reshape due to memory constraints
forval i = 1/10 {
preserve
keep patient encdate`i' deanum datedisp daysuply decqty 
rename encdate`i' encdate
drop if mi(encdate)
save "$data_out/temp_`i'", replace
restore
}

clear
gen num = .
gen days_prior = .
forval i = 1/10 {
append using "$data_out/temp_`i'"
replace days_prior = datedisp - encdate if mi(days_prior)
keep if inrange(days_prior, -180,730)
replace num = `i' if mi(num)

}
format datedisp %d



sort patient encdate

egen encounter_group = group(patient num)

*group by time period - want to leave out first 7 days
cap drop temp* opioid_days
gen temp = -1 if days_prior < 0
replace temp = 0 if inrange(days_prior, 0, 7)
replace temp = 1 if days_prior > 7 & days_prior < 180
replace temp = 2 if days_prior >= 180 & days_prior <= 365
replace temp = 3 if days_prior > 365 & days_prior <= 730
gen temp_prescription = 1 if temp == 0
bys encounter_group: egen ed_prescription = max(temp_prescription)
egen temp2 = group(encounter_group temp)
*total supply for each grouping
bys temp2: egen opioid_days = total(daysuply)
gen temp_previous = opioid_days if temp == -1

*create variables listing how much use in each time period exlcuding first 7 days
bys encounter_group: egen previous_use = max(temp_previous)
recode previous_use . =0
  **first 6 months (temp = 1)

 gen temp_days_supply_initial = opioid_days if temp == 0

gen temp_days_supply_180 = opioid_days if temp == 1
gen temp_days_supply_365 = opioid_days if temp == 2
gen temp_days_supply_730 = opioid_days if temp == 3

bys encounter_group: egen following_use_initial = max(temp_days_supply_initial)
bys encounter_group: egen following_use_180 = max(temp_days_supply_180)
bys encounter_group: egen following_use_365 = max(temp_days_supply_365)
bys encounter_group: egen following_use_730 = max(temp_days_supply_730)

*identify number of unique providers (6 months and 1 year)
bys encounter_group deanum: gen temp_prov_180 = 1 if _n==1 & temp == 1
bys encounter_group deanum: gen temp_prov_365 = 1 if _n==1 & inlist(temp,1,2)

bys encounter_group: egen num_providers_180 = total(temp_prov_180)
bys encounter_group: egen num_providers_365 = total(temp_prov_365)


*create variable indicating long term use
gen refill = 1 if following_use_180 > 0 & following_use_180 <.

gen long_term_use1 = 1 if following_use_365 > 0 & following_use_365 <.
gen long_term_use2 = 1 if following_use_730 > 0 & following_use_730 <.

*create variable indicating the number of fills
gen script = 1
bys temp2: egen total_scripts = total(script)


gen temp_script_180 = total_scripts if temp == 1
gen temp_script_365 = total_scripts if temp == 2
gen temp_script_730 = total_scripts if temp == 3

bys encounter_group: egen scripts_180 = max(temp_script_180) 
bys encounter_group: egen scripts_365 = max(temp_script_365)
bys encounter_group: egen scripts_730 = max(temp_script_730)


save "$data_out/temp_sample2", replace

use "$data_out/temp_sample2", clear

*merge back in with main sample and recode missing to zero
keep patient encdate encounter_group previous_use following_use*  num_providers* scripts_* ed_prescription long_term_use* refill
bys patient encdate: keep if _n ==1
merge 1:1 patient encdate using "$data_out/temp_sample", nogen
recode previous_use .=0
recode following_use_initial .=0
recode following_use_180 .=0
recode following_use_365 .=0
recode num_providers_180 .=0
recode num_providers_365 .=0

keep if previous == 0
recode ed_prescription .=0
recode refill .=0
recode long_term_use1 .=0
recode long_term_use2 .=0

recode scripts_180 .=0
recode scripts_365 .=0
recode scripts_730 .=0



*drop overlap in opioid prescription period (7 days)
bys patient (encdate): gen lag = encdate-encdate[_n-1]
drop if lag <=7



*drop if missing provider info 
drop if mi(provnpi1)



//generate first stage outcomes

gen year_use_days = following_use_180 + following_use_365
gen day_180 = year_use_days >=180
gen prov7 = num_providers_365 >=7





keep patient encdate ed_prescription year_use_days  long_term_use* scripts_* dx1 provnpi1 provID ///
		  clinic year dx1 num_providers_365 day_180 prov7 refill
egen visitID = group(patient encdate)

gen quarter = qofd(encdate)
format quarter %tq
rename patient pid_pde
sort pid_pde quarter

//merge in demographic information

merge m:1 pid_pde quarter using "$data_out/demographics_varying", keep(3) nogen
merge m:1 pid_pde using "$data_out/demographics_nonvarying", keep(3) nogen

//identify rank at time of visit
merge m:1 pid_pde using "$data_out/rank.dta", keep(1 3) nogen

gen begin_rank = rank1
gen rank_date = dor1

forval i = 2/15 {
replace rank_date = dor`i' if dor`i' > rank_date & dor`i' <= encdate
replace begin_rank = rank`i' if rank_date == dor`i'
}




*create demographic coded variables

gen age = floor((encdate-birthday)/365.25)
egen age_bin = cut(age), at(10,20,30,40,50,60,70,80)

gen married = mrtl_stat == "M"
gen race_white = race_cd == "005"
gen junior_enlisted = inlist(begin_rank,"E01", "E02", "E03", "E04")
gen officer = inlist(substr(begin_rank,1,1), "W", "O")
destring edu_lvl, replace
gen college = edu_lvl >= 51 & edu_lvl <.
egen mos = group(pri_dod_occ service)

//bring in discharge dates


merge m:1 pid_pde using "$data_out/discharge_sample", keep(3) nogen

//drop any encounters that aren't during active service period
keep if inrange( encdate,begin_service,discharge_date)
drop if rank_date > encdate
replace afqt_pct = .  if afqt_pct == 0
drop if mi(afqt_pct)



//drop low-density providers
bys provID year: keep if _N >= 10


save "$data_out/naive", replace






